import os
import csv
import sys
import numpy as np
from os import listdir
import globalStuff as gls
from globalStuff import POW_THRESHOLD_GPU_ACTIVE, list_event_names, max_values_metrics, gpu_info_folder

def avg_above_threshold(values, pIdle):
    good_values = values > pIdle
    if good_values.any():
        return values[good_values].mean()
    else:
        return pIdle

def readIdlePowers(clocks, gpu_name):
    idle_powers = np.zeros((clocks['num_mem_clocks'],np.max(clocks['num_core_clocks'])), dtype=np.float32)
    list_idle_powers = np.asarray(readISA("%s/%s/idle_pows_%s.txt" %(gpu_info_folder,gpu_name, gpu_name)))
    id_aux=0
    for clock_mem_id, clock_mem in enumerate(clocks['mem_clocks']):
        for clock_core_id, clock_core in enumerate(clocks['core_clocks'][clock_mem_id]):
            idle_powers[clock_mem_id][clock_core_id] = list_idle_powers[id_aux]
            id_aux += 1
    return idle_powers

#lists all files with name format 'out_power_samples_*' (file generated by
# gpuPOWERSAMPLER), opens them and goes through all the samples. For each
# file it computes the average power consumption of values above
# POW_THRESHOLD_GPU_ACTIVE.
# Output: Float value with the average of the power consumptions across all
# files read.
def readPowerBench(data_path, bench, idle_power):
    files = listdir('%s/%s/' %(data_path, bench))
    files.sort()
    avg_pows_aux = []
    for file in files:
        if file.startswith("out_power_samples_"):
            time_samples=[]
            pow_samples=[]
            with open('%s/%s/%s' %(data_path, bench, file)) as csvfile:
                spamreader = csv.reader(csvfile, delimiter=';')
                for row_id, row in enumerate(spamreader):
                    if row_id>1:
                        time_samples.append(float(row[0]))
                        pow_samples.append(float(row[1]))
            time_samples = np.asarray(time_samples, dtype=np.int32)
            pow_samples = np.asarray(pow_samples, dtype=np.float32)
            avg_pows_aux.append(avg_above_threshold(pow_samples, idle_power+POW_THRESHOLD_GPU_ACTIVE))
    if len(avg_pows_aux) == 0:
        print('Missing power consumption files for %s benchmark' %(bench))
        sys.exit()
    avg_pows = np.asarray(avg_pows_aux)
    return np.mean(avg_pows)

#lists all files with name format 'output_nvprof_*', opens them and reads
# kernel execution time.
# Output: Float value with the execution time across all kernels
def readExecTime(data_path, bench, gpu):
    files = listdir('%s/%s/' %(data_path, bench))
    files.sort()
    total_execution_all_kernels_aux = []
    for file in files:
        if file.startswith("output_nvprof_"):
            if not file.startswith("output_nvprof_metrics_"):
                total_time=[]
                num_calls=[]
                avg_time_call=[]
                max_time_call=[]
                with open('%s/%s/%s' %(data_path, bench, file)) as csvfile:
                    spamreader = csv.reader(csvfile, delimiter=',')
                    for row_id, row in enumerate(spamreader):
                        if row_id>4:
                            if not 'CUDA' in row[6]:
                                total_time.append(float(row[2]))
                                num_calls.append(float(row[3]))
                                avg_time_call.append(float(row[4]))
                                max_time_call.append(float(row[6]))
                num_calls = np.asarray(num_calls, dtype=np.int32)
                total_time = np.asarray(total_time, dtype=np.float32)
                avg_time_call = np.asarray(avg_time_call, dtype=np.float32)
                max_time_call = np.asarray(max_time_call, dtype=np.float32)
                total_execution_all_kernels_aux.append(np.sum(total_time))
    if len(total_execution_all_kernels_aux) == 0:
        print('Missing execution times for %s benchmark' %(bench))
        sys.exit()
    total_execution_all_kernels = np.asarray(total_execution_all_kernels_aux)
    return np.mean(total_execution_all_kernels)

#Reads the 'outputOccurrences_per_kernel.csv'  (generated by cudaAssemblyReader.py)
# which has the score for the PTX instructions for each kernel.
# Output: List of values with the number of occurrences of each ISA instruction
# in the kernel code.
def readInstructionsBench(data_path, bench):
    with open('%s/%s/outputOccurrences_per_kernel.csv' %(data_path, bench)) as csvfile:
        spamreader = csv.reader(csvfile)
        kernel_id=0
        for row in spamreader:
            list_inst_aux = np.zeros(len(row)-1, dtype=np.int32)
            for inst_id, inst in enumerate(row):
                if (inst_id > 0):
                    list_inst_aux[inst_id-1] = row[inst_id]
            kernel_id += 1
    return list_inst_aux

def readInstructionsSeqReadable(data_path, bench):
    files = listdir('%s/%s/' %(data_path, bench))
    files.sort()
    for file in files:
        if file.startswith("outputSequenceReadable_kernel"):
            f = open('%s/%s/%s' %(data_path, bench, file))
            sequence_kernel = []
            for line in f:
                sequence_kernel.append(line[:-1])
            f.close()
    return sequence_kernel

def oneHotEncode(integer_encoded, vocabulary_size):
    onehot_encoded = np.array([0 for _ in range(vocabulary_size)])
    onehot_encoded[int(integer_encoded)] = 1
    return onehot_encoded

#lists all files with name format 'outputSequence_kernel_*' (file generated by
# cudaAssemblyReader.py), opens them and goes through the sequence of instructions,
# saving the corresponding index (from the ISA), in sequence_kernel.
# Output: List of sequences for each kernel in the application.
def readInstructionsSeq(data_path, bench):
    files = listdir('%s/%s/' %(data_path, bench))
    files.sort()
    for file in files:
        if file.startswith("outputSequence_kernel_depend_"):
            f = open('%s/%s/%s' %(data_path, bench, file))
            sequence_kernel = []
            sequence_kernel.append(0)
            for line in f:
                inst_code = int(line[0:3]) + 3 #because instruction 0 - BEGIN, 1 - END and 2 - PAD
                rest_opcode = int(line[3:9]) # NOTUSED
                mod_inst = int(line[3]) # STATE SPACE (DESTINO DA OPERAÇÃO) [8]
                type_inst = int(line[4:6]) # TIPO DE OPERANDOS/DESTINO [17]
                num_operands = int(line[6]) # N. OPERANDOS
                dependecy_inst = int(line[7]) # N. INSTRUÇÕES ENTRE A DEPENDENCIA DE DADOS
                dependecy_type = int(line[8]) # TIPO DE DEPENDENCIA (CORE/MEM)

                opcode_all = (dependecy_type) + ((gls.dependencies_types+1)*dependecy_inst) + ((gls.dependencies_types+1)*(gls.buffer_max_size+1)*num_operands) + ((gls.dependencies_types+1)*(gls.buffer_max_size+1)*(gls.max_operands+1)*type_inst) + ((gls.dependencies_types+1)*(gls.buffer_max_size+1)*(gls.max_operands+1)*(gls.inst_types_size+1)*mod_inst) + ((gls.dependencies_types+1)*(gls.buffer_max_size+1)*(gls.max_operands+1)*(gls.inst_types_size+1)*(gls.state_spaces_size+1)*inst_code)
                sequence_kernel.append(opcode_all)
            sequence_kernel.append(1)
            f.close()
    return sequence_kernel

def readAggDataFile(file_path, clocks, tdp, gpu):
    file_name = "%s/aggregated_dataset_%s.csv" %(file_path, gpu)

    num_mem_clocks = clocks['num_mem_clocks']
    num_core_clocks = clocks['num_core_clocks']
    max_num_core_clocks = np.max(num_core_clocks)

    id_mem_default, =  np.where(clocks['mem_clocks'] == clocks['default_mem_clock'])
    id_core_default, =  np.where(clocks['core_clocks'][id_mem_default[0]] == clocks['default_core_clock'])

    list_bench_names = []
    list_time_dvfs = []
    list_pow_dvfs = []
    list_energy_dvfs = []
    list_time_default = []
    list_pow_default = []
    list_energy_default = []

    with open(file_name) as csvfile:
        spamreader = csv.reader(csvfile)
        bench_id = 0
        clock_core_id = -1
        clock_mem_id = -1
        for row in spamreader:
            #firts row of each benchmark has benchmark name and optimal frequencies
            if clock_core_id == -1 and clock_mem_id == -1:
                clock_core_id = 0
                clock_mem_id = 0
                list_bench_names.append(row[0])
                values_time = np.zeros((num_mem_clocks, max_num_core_clocks), dtype=np.float32)
                values_pow = np.zeros((num_mem_clocks, max_num_core_clocks), dtype=np.float32)
                values_energy = np.zeros((num_mem_clocks, max_num_core_clocks), dtype=np.float32)
            else:
                (mem_clock, core_clock, time, pow, energy) = row
                #gets values obtained at the optimal frequency
                if int(core_clock) == clocks['default_core_clock'] and int(mem_clock) == clocks['default_mem_clock']:
                    list_time_default.append(float(time))
                    list_pow_default.append(float(pow) / tdp)
                    list_energy_default.append(float(energy))

                values_time[clock_mem_id, clock_core_id] = float(time)
                values_pow[clock_mem_id, clock_core_id] = float(pow)
                values_energy[clock_mem_id, clock_core_id] = float(energy)

                #checks if its the last value of that benchmark
                if clock_core_id == max_num_core_clocks-1:
                    if clock_mem_id == num_mem_clocks-1:
                        clock_mem_id = -1
                        clock_core_id = -1

                        #Scaling-factors
                        values_time = values_time / values_time[id_mem_default, id_core_default] #normalizes the obtained to the value at the default
                        list_time_dvfs.append(values_time)
                        values_pow = values_pow / tdp
                        values_pow = values_pow / values_pow[id_mem_default, id_core_default] #normalizes the obtained to the value at the default
                        list_pow_dvfs.append(values_pow)
                        values_energy = values_energy / values_energy[id_mem_default, id_core_default] #normalizes the obtained to the value at the default
                        list_energy_dvfs.append(values_energy)
                        bench_id += 1
                    else:
                        clock_mem_id += 1
                        clock_core_id = 0
                else:
                    clock_core_id += 1

    list_values_read = {'names': list_bench_names, 'time_dvfs': list_time_dvfs, 'pow_dvfs': list_pow_dvfs, 'energy_dvfs': list_energy_dvfs, 'time_default': list_time_default, 'pow_default': list_pow_default, 'energy_default': list_energy_default}

    return list_values_read

def readPCAggDataFile(file_path, list_data, gpu):
    file_name = "%s/output_allbenchs_metrics_%s.csv" %(file_path, gpu)
    for metric in list_event_names:
        list_data[metric] = []
    with open(file_name) as csvfile:
        spamreader = csv.reader(csvfile)
        bench_id = 0
        for row in spamreader:
            #firts row of each benchmark has benchmark name and optimal frequencies
            for metric_id, metric in enumerate(list_event_names):
                list_data[metric].append(float(row[metric_id+1])/max_values_metrics[metric_id])

    return list_data

def getClocksGPU(gpu_name):
    mem_clocks = readISA("%s/%s/clks_mem_%s.txt" %(gpu_info_folder,gpu_name, gpu_name))
    default_mem_clock = int(mem_clocks[-1])
    del mem_clocks[-1]

    mem_clocks = np.asarray(mem_clocks)
    mem_clocks = mem_clocks.astype(int)
    mem_clocks.sort()

    core_clocks = [None]*len(mem_clocks)
    num_core_clocks = [None]*len(mem_clocks)
    for mem_clock_id, mem_clock in enumerate(mem_clocks):
        core_clocks_aux = readISA("%s/%s/clks_core_%s_%d.txt" %(gpu_info_folder,gpu_name, gpu_name, mem_clock))
        default_core_clock = int(core_clocks_aux[-1])
        del core_clocks_aux[-1]
        core_clocks_aux = np.asarray(core_clocks_aux)
        core_clocks_aux = core_clocks_aux.astype(int)
        core_clocks_aux.sort()
        core_clocks[mem_clock_id] = core_clocks_aux
        num_core_clocks[mem_clock_id] = len(core_clocks_aux)
    num_core_clocks =  np.asarray(num_core_clocks)
    num_core_clocks = num_core_clocks.astype(int)
    highest_mem_clock = mem_clocks[-1]
    highest_core_clock = core_clocks[-1][-1]
    clocks = {'mem_clocks': mem_clocks, 'core_clocks': core_clocks, 'default_core_clock': default_core_clock, 'default_mem_clock': default_mem_clock, 'highest_core_clock': highest_core_clock, 'highest_mem_clock': highest_mem_clock, 'num_mem_clocks': len(mem_clocks), 'num_core_clocks': num_core_clocks}
    return clocks

def readDataSet(data_path, gpu_name, tdp, pc):
    clocks = getClocksGPU(gpu_name)

    list_values_dataset = readAggDataFile(data_path, clocks, tdp, gpu_name)
    if pc == True:
        list_values_dataset = readPCAggDataFile(data_path, list_values_dataset, gpu_name)

    list_benchmarks = list_values_dataset['names']
    num_benchmarks = len(list_benchmarks)

    list_instructions = np.zeros((num_benchmarks, gls.ISA_size), dtype=np.int32)
    list_sequences = [None]*num_benchmarks
    for bench_id, bench in enumerate(list_benchmarks):
        kernel_seq_bench = readInstructionsSeq(data_path, bench)
        list_bench = readInstructionsBench(data_path, bench)
        list_instructions[bench_id] = list_bench
        list_sequences[bench_id] = kernel_seq_bench
    list_values_dataset['inst_histo'] = list_instructions
    list_values_dataset['inst_seq'] = list_sequences

    return list_values_dataset, clocks

#Reads the ISA file (or list of benchmarks), with the list of available assembly
# instructions
#Output: list of instructions (or list of benchmarks)
def readISA(filepath):
    f = open(filepath)
    ISA = []
    for line in f:
        line = line.rstrip('\t\n')
        aux = line.split('\t')
        inst = aux[0]
        inst = inst.strip()
        ISA.append(inst)
    f.close()
    return ISA

def getBenchmarksAvailable(path, benchs_file, data_path):
    ubenchmarks_in_folder = listdir(data_path)
    if benchs_file != 'all':
        ubenchmarks = readISA('%s/%s' %(path, benchs_file))
        for bench_id, bench in enumerate(ubenchmarks):
            if bench not in ubenchmarks_in_folder:
                print('Data for microbenchmark %s is not in given path.' %(bench))
                sys.exit()
    else:
        ubenchmarks = ubenchmarks_in_folder
    return ubenchmarks
